# ------------------------- #
#    Downweight the data    #
# ------------------------- #

## Clear working space
rm(list = ls())


# Create function to set data in a range of 0 to 1
range01 <- function(x){(x-min(x, na.rm = T))/(max(x, na.rm = T)-min(x, na.rm = T))}

## Load in the dyad data
dyad_data <- readRDS("intl_order/network_layers_replication.rds")

## Make sure the sender and reciever is unique
dyad_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T 
) %>% transform(
  mid = cowmidongoing 
) %>% transform(
  fatal_mid = ifelse(mid == 1 & fatality > 0, 1, 0)
) 

## Set missing distance data to missing
dyad_data$contig[dyad_data$mindist == 0] <- 1

cor_data <- dplyr::select(
  dyad_data,
  atop_defense, atop_offense, pta, igo, dcaV2
)

cor_data <- cor_data[complete.cases(cor_data),]
cor_data <- cor(cor_data)
cor_data <- data.frame(cor_data)
cor_data$sender <- row.names(cor_data) 
cor_data <- reshape2::melt(
  cor_data, 
  id = "sender"
)

cor_data <- mutate(
  cor_data, 
  sender = as.character(sender),
  sender = ifelse(sender == "atop_defense", "Defensive\nalliance", sender),
  sender = ifelse(sender == "atop_offense", "Offensive\nalliance", sender),
  sender = ifelse(sender == "dcaV2", "DCA", sender),
  sender = ifelse(sender == "pta", "PTA", sender),
  sender = ifelse(sender == "igo", "IGO", sender),
  
  variable = as.character(variable),
  variable = ifelse(variable == "atop_defense", "Defensive\nalliance", variable),
  variable = ifelse(variable == "atop_offense", "Offensive\nalliance", variable),
  variable = ifelse(variable == "dcaV2", "DCA", variable),
  variable = ifelse(variable == "pta", "PTA", variable),
  variable = ifelse(variable == "igo", "IGO", variable),
  fac_lab = "Layer correlation"
) %>% filter(
  variable != sender
)


ggplot(cor_data, aes(x = reorder(sender, -value), y = reorder(variable, -value), fill = value)) + 
  geom_tile() + 
  theme_classic() + 
  facet_wrap(~fac_lab) + 
  labs(x = "", y = "", fill = "Correlation") + 
  theme(panel.background = element_rect(fill = "black",
                                       colour = "black",
                                       linewidth = 0.5, linetype = "solid"),
        panel.grid.major = element_line(linewidth = 0.5, linetype = 'solid',
                                        colour = "black"), 
        panel.grid.minor = element_line(linewidth = 0.25, linetype = 'solid',
                                        colour = "black"),
        legend.position = "right", 
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 16),
        strip.text = element_text(size = 18, color = "black"), 
        axis.text = element_text(size = 15,  color = "black"), 
        axis.title = element_text(size = 15,  color = "black")) + 
  scale_fill_gradient(low = "white", high = "darkred")
  
dir.create("data_visualization", showWarnings = F, recursive = T)
ggsave("data_visualization/layer_corr.png", width = 9, height = 8, units = "in")  

summary(cor_data$value)
## Create a long data frame for weighting purposes
unweighted_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T
) %>% mutate(
  major_power = cowmaj1  + cowmaj2,
  joint_major_power = if_else(cowmaj1 == 1 & cowmaj2 == 1, 1, 0),
  joint_region = if_else(region1 == region2, 1, 0)
) %>% dplyr::select(
  c(
    ccode1, ccode2, year, contig, 
    pta, bla, dcaV1, dcaV2,
    dipl, igo, 
    atop_defense, atop_offense, atop_neutral, atop_consul,
    joint_region,
    major_power, joint_major_power, cowc1, cowc2,
    fatal_mid, mid, region1, region2
  )
) %>% reshape2::melt(
  id = c(
    "ccode1", "ccode2", "year", "contig", 
    "joint_region", "cowc1", "cowc2",
    "major_power", "joint_major_power", "mid", "fatal_mid", "region1", "region2"
  )
) %>% dplyr::rename(
  layer = variable
) %>% mutate(
  mid_id = paste0(cowc1, "-", cowc2),
  value = if_else(is.na(value), 0, value),
  region_id = paste0(region1, "-", region2)
) 


## Manually fix missing contiguity data
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & !(unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & !(unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode1 == 625 & unweighted_data$ccode2 %in% c(500, 501, 490)] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode2 == 625 & unweighted_data$ccode1 %in% c(500, 501, 490)] <- 0
sum(is.na(unweighted_data$contig))

## Find data to index on
max(subset(unweighted_data, contig == 1)$year)
sum(is.na(unweighted_data$major_power))

## Major power variable creation
unweighted_data$major_power <- factor(
  unweighted_data$major_power,
  levels = c("0", "1", "2")
)

## Create a year and year squared variable
input_data <- unweighted_data
input_data$year_sqr <- input_data$year ^ 2

## Normalize IGO data
igo_temp <- filter(
  input_data, 
  grepl("igo", layer)
) 

range01_data <- list()
for (i in 1:length(unique(igo_temp$layer)))
{
  temp_subset <- filter(
    igo_temp,
    layer == unique(igo_temp$layer)[i]
  )
  for (z in 1:length(unique(temp_subset$year)))
  {
    if (any(subset(temp_subset, year == unique(temp_subset$year)[z])$value != 0))
    {
      subset_by_year <- filter(
        temp_subset, 
        year == unique(temp_subset$year)[z]
      )
      
      ## Round data for replication purposes
      ## This stuff varies slightly by operating system
      subset_by_year <- transform(
        subset_by_year,
        value = round(range01(value), 2)
      )
      range01_data[[length(range01_data) + 1]] <- subset_by_year
    }
  }
}

## Bring back in the normalized IGO data
igo_temp <- do.call(rbind, range01_data)
input_data <- filter(
  input_data,
  !grepl("igo", layer)
) %>% bind_rows(
  igo_temp
) %>% mutate(
  layer = as.character(layer)
)

## Original network layers
input_data <- filter(
  input_data,
  !is.na(value),
  layer %in% c(
    "pta", 
    "atop_offense", 
    "atop_defense", 
    "igo",
    "dcaV2"
  )
) 

gc()

## Loop through the layers to weight data
layer_summary <- list()
for (i in 1:length(unique(input_data$layer)))
{
  
  
  temp_data <- filter(
    input_data, 
    layer == unique(input_data$layer)[i]
  ) %>% distinct(
    ccode1, ccode2, year, .keep_all = T
  ) %>% mutate(
    dyad_id = paste0(cowc1, "-", cowc2)
  )
  
  
  years_unique <- data.table::setDT(temp_data)[,.(n = length(unique(value))), by = 'year']
  temp_data <- filter(
    temp_data,
    year >= min(years_unique$year[years_unique$n > 1])
  )
  
  suppressWarnings({suppressMessages({layer_model <- fixest::feols(value ~ contig | year + ccode1 + ccode2, data = temp_data)})})
  temp_data$pred_values <- predict(layer_model)
  layer_summary[[i]] <- data.frame(temp_data)
}

gc()
## Merge back in weighted data
layer_summary_bind <- do.call(bind_rows, layer_summary)
layer_summary_bind[is.na(layer_summary_bind)] <- 0
layer_summary_bind$pred_values <- round(layer_summary_bind$pred_values, 2)
layer_summary_bind$tie <- layer_summary_bind$value - layer_summary_bind$pred_values


## Normalize the ties by number of layers
weighted_data <- group_by(
  layer_summary_bind,
  year
) %>% mutate(
      n = length(unique(layer))
  ) %>% ungroup(
    .
  ) %>% mutate(
    tie = tie/n,
    tie = round(tie, 2) ## Round for replication purposes across operating systems
  )

## Sum the weighted ties
weighted_data <- data.table::setDT(weighted_data)[,.(tie = sum(tie)), by = 'cowc1,cowc2,year']

## Rename weigthed ties
weighted_data <- dplyr::rename(
  weighted_data,
  namea = cowc1, 
  nameb = cowc2
) %>% data.frame(.)
  
## Create network data for analysis
adjacency_matricies <- list()
for (v in min(unweighted_data$year):2014)
{
  ## Normalize data by year
  temp_edgelist <- filter(
    weighted_data,
    year == v
  )
  
  ## Normalize between 0 and 1
  temp_edgelist$tie[temp_edgelist$tie < 0] <- NA
  temp_edgelist$tie  <- range01(temp_edgelist$tie)
  temp_edgelist$tie <- round(temp_edgelist$tie, 2)
  temp_edgelist <- arrange(
    temp_edgelist,
    namea, nameb
  ) %>% distinct(
    namea, nameb, tie
  )
  
  ## Set negative edge weights to zero
  temp_edgelist$tie[is.na(temp_edgelist$tie)] <- 0
  temp_net <- graph_from_edgelist(as.matrix(temp_edgelist[,c(1:2)]), directed = F)
  E(temp_net)$weight <- temp_edgelist$tie
  
  ## Create an adjacency matrix
  temp_adj <- as_adjacency_matrix(temp_net, attr="weight", sparse = T)
  temp_adj <- as.matrix(temp_adj)
  temp_adj <- temp_adj[order(rownames(temp_adj)),order(colnames(temp_adj))]
  adjacency_matricies[[length(adjacency_matricies) + 1]] <- as.matrix(temp_adj)
}


gc()
set.seed(9732)
## Interlayer community detection
community.blondel <- lapply(adjacency_matricies, function(x){ 
  g <- graph_from_adjacency_matrix(x, mode = "undirected", weighted = T)
  comm <- cluster_louvain(g, weights = E(g)$weight)
  glag.info <- data.frame(
    names = comm$names,
    group = comm$membership
  ) 
    return(glag.info)
}) 
  
## Define year in data and new group names by year
for (i in 1:length(community.blondel))
{
  community.blondel[[i]]$year <- i + min(unweighted_data$year) - 1
  if (i != 1)
  {
    community.blondel[[i]]$group <- community.blondel[[i]]$group + max(community.blondel[[i - 1]]$group) 
  }
}

## Named variables  
community.blondel <- do.call(bind_rows, community.blondel)
community.blondel$group <- as.numeric(as.factor(community.blondel$group))


## Check for missing
missing_data <- unique(
  c(
    paste0(subset(dyad_data, year <= 2014)$cowc1, "-", subset(dyad_data, year <= 2014)$year),
    paste0(subset(dyad_data, year <= 2014)$cowc2, "-", subset(dyad_data, year <= 2014)$year)
  )
)

missing_data[!(missing_data %in% paste0(community.blondel$names, "-", community.blondel$year))]

## Create interlayer network ties
group_matrix <- matrix(0, nrow = length(unique(community.blondel$group)), ncol = length(unique(community.blondel$group)))
colnames(group_matrix) <- rownames(group_matrix) <- unique(community.blondel$group)
    
gc()    
max_year <- plyr::ddply(
    community.blondel, 
    ~group, 
    summarize, 
    max_year = max(year)
  ) %>% arrange(
        max_year
  )

## Create data to fill in 
group_names <- unique(colnames(group_matrix))
edgelist <- data.frame(from = NA, to = NA, weight = NA)
for (i in 1:nrow(max_year))
{
  names_temp <- subset(community.blondel, group == max_year$group[i])$names
  year_temp  <- max_year$max_year[i] + 1
  temp_data <- subset(community.blondel, names %in% names_temp & year == year_temp)
  from <- max_year$group[i]
  to <- unique(temp_data$group)
  weights <- c()
      
  if (length(to) > 0)
  {
    ## Jaccard similarity for each layer
    for (z in 1:length(to))
    {
      union_data <- union(names_temp, subset(temp_data, group == to[z])$names)
      sender_vec <- ifelse(union_data %in% names_temp, 1, 0)
      reciever_vec <- ifelse(union_data %in% subset(temp_data, group == to[z])$names, 1, 0)
      weights[z] <- length(intersect(names_temp, subset(temp_data, group == to[z])$names))/length(union_data)
    }
    
    ## Create Jaccard similarity edge list
    edgelist <- bind_rows(
      edgelist,
      data.frame(
        from = rep(from, length(to)),
        to = to,
        weight = weights
      )
      ) %>% filter(
        !is.na(from)
      )
    }
}

## Round for replication purposes
edgelist$weight <- round(edgelist$weight, 2)

## Fill in an empty matrix for edge weights
for (i in 1:nrow(edgelist))
{
  group_matrix[rownames(group_matrix) == edgelist$from[i],
               colnames(group_matrix) == edgelist$to[i]] <- edgelist$weight[i] 
  
  group_matrix[rownames(group_matrix) == edgelist$to[i], 
                   colnames(group_matrix) == edgelist$from[i]] <- edgelist$weight[i] 
}
    
## Create self loop for 2 times Jaccard similarity
diag(group_matrix) <- 2
group_matrix[group_matrix < 0] <- 0
 
## Run interlayer community detection   
gc()    
g <- graph_from_adjacency_matrix(group_matrix, mode = "undirected", weighted = T)
set.seed(477)
comm <- cluster_louvain(g, weights = E(g)$weight)
between.group <- data.frame(
  group = comm$names,
  interlayer = comm$membership
)
    
## Merge in data
community.blondel <- mutate(
  community.blondel,
    group = as.character(group)
  ) %>% left_join(
    between.group,
      by = c(
        "group"
      )
) 
  
## Find group size for data  
group.size <- mutate(
  community.blondel,
    n = 1
  ) %>% aggregate(
    n ~ year + interlayer, data = ., FUN = sum
)

## Merge in group size    
community.blondel <- left_join(
  community.blondel,
  group.size,
    by = c(
      "year", "interlayer"
    )
)

## Are any missing? 
sum(is.na(community.blondel$interlayer))
missing_data[!(missing_data %in% paste0(community.blondel$names, "-", community.blondel$year))]


war.data.temp.test <- distinct(
  unweighted_data, 
  cowc1, cowc2, year, contig, mid, fatal_mid, .keep_all = T
  ) %>% left_join(
  community.blondel,
    by = c(
        "cowc1" = "names", "year"
      )
    ) %>% left_join(
      community.blondel,
      by = c(
        "cowc2" = "names", "year"
      )
    ) %>% mutate(
      dyad = paste(ccode1, ccode2, sep = "-"),
      contig = if_else(contig %in% 1:2, 1, 0),
      year_sqr = year ^ 2, 
      out_group = if_else(interlayer.x != interlayer.y, 1, 0)
)

## Create merging data
ccode_merge <- data.frame(
  ccode = c(dyad_data$ccode1, dyad_data$ccode2),
  names = c(dyad_data$cowc1, dyad_data$cowc2)
) %>% distinct(
  names, ccode
)


## Merge in communities to dyads
community.blondel <- left_join(
  community.blondel, 
  ccode_merge,
  by = "names"
)

## Save output matrix, community, weighted data, and unweighted data
saveRDS(adjacency_matricies, file = "intl_order/adjacency_matricies_replication.rds")
saveRDS(community.blondel, file = "intl_order/international_order_replication.rds")
saveRDS(weighted_data, file = "intl_order/weighted_data_replication.rds")
saveRDS(unweighted_data, file = "intl_order/unweighted_data_replication.rds")
